{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from selenium import webdriver"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "ename": "WebDriverException",
     "evalue": "Message: 'chromedriver' executable needs to be in PATH. Please see https://sites.google.com/a/chromium.org/chromedriver/home\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32mD:\\python\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\common\\service.py\u001b[0m in \u001b[0;36mstart\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     75\u001b[0m                                             \u001b[0mstderr\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlog_file\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 76\u001b[1;33m                                             stdin=PIPE)\n\u001b[0m\u001b[0;32m     77\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\python\\Anaconda3\\lib\\subprocess.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text)\u001b[0m\n\u001b[0;32m    774\u001b[0m                                 \u001b[0merrread\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrwrite\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 775\u001b[1;33m                                 restore_signals, start_new_session)\n\u001b[0m\u001b[0;32m    776\u001b[0m         \u001b[1;32mexcept\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\python\\Anaconda3\\lib\\subprocess.py\u001b[0m in \u001b[0;36m_execute_child\u001b[1;34m(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_start_new_session)\u001b[0m\n\u001b[0;32m   1177\u001b[0m                                          \u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfspath\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcwd\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcwd\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32melse\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1178\u001b[1;33m                                          startupinfo)\n\u001b[0m\u001b[0;32m   1179\u001b[0m             \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [WinError 2] 系统找不到指定的文件。",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[1;31mWebDriverException\u001b[0m                        Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-2-082db34b82d3>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mbr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mChrome\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mD:\\python\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\chrome\\webdriver.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, executable_path, port, options, service_args, desired_capabilities, service_log_path, chrome_options, keep_alive)\u001b[0m\n\u001b[0;32m     71\u001b[0m             \u001b[0mservice_args\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mservice_args\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     72\u001b[0m             log_path=service_log_path)\n\u001b[1;32m---> 73\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mservice\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     74\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     75\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\python\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\common\\service.py\u001b[0m in \u001b[0;36mstart\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     81\u001b[0m                 raise WebDriverException(\n\u001b[0;32m     82\u001b[0m                     \"'%s' executable needs to be in PATH. %s\" % (\n\u001b[1;32m---> 83\u001b[1;33m                         os.path.basename(self.path), self.start_error_message)\n\u001b[0m\u001b[0;32m     84\u001b[0m                 )\n\u001b[0;32m     85\u001b[0m             \u001b[1;32melif\u001b[0m \u001b[0merr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrno\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0merrno\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mEACCES\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mWebDriverException\u001b[0m: Message: 'chromedriver' executable needs to be in PATH. Please see https://sites.google.com/a/chromium.org/chromedriver/home\n"
     ]
    }
   ],
   "source": [
    "br = webdriver.Chrome()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "br.get('http://www.spbeen.com')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "br.get('https://www.baidu.com')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "br.get('https://www.taobao.com')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "br.get('https://www.tmall.com')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "br.get('http://www.spbeen.com')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "233540\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "url = 'http://www.tmall.com'\n",
    "resp = requests.get(url)\n",
    "print(len(resp.text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "400041\n"
     ]
    }
   ],
   "source": [
    "resp = br.get('http://www.tmall.com')\n",
    "html_str = br.page_source\n",
    "print(len(html_str))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<!DOCTYPE html><html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\"><head>\n",
      "\t\t<meta charset=\"UTF-8\" />\n",
      "\n",
      "\t\t<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n",
      "\t\t<meta name=\"keywords\" content=\"Python, Python资讯, Python基础教程, Python学习笔记, Python电子书, Python视频教程, Python编程练习, Python Django, Python Flask, Python Tutorials, Python Video Tutorials Download\" />\n",
      "\t\t<meta name=\"description\" content=\"www.spbeen.com是一个关于Python技术栈的教程网站，分教程、项目两类，目前有Python3教程、Xpath教程、基于Python的网络爬虫、基于Python的文件备份、\" />\n",
      "\t\t<meta name=\"author\" content=\"布啦豆\" />\n",
      "\n",
      "\t\t<meta name=\"baidu_union_verify\" content=\"d4634268483255495b43735addb956bf\" />\n",
      "\n",
      "        <meta property=\"og:type\" content=\"article\" />\n",
      "        <meta property=\"og:image\" content=\"http://upload-images.jianshu.io/upload_images/174489-0434c2855ab98c3a.JPG?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240\" />\n",
      "        <meta property=\"og:url\" content=\"http://spbeen.com/course/Python3%E6%95%99%E7%A8%8B/\" />\n",
      "        <meta property=\"og:title\" content=\"Python3.x教程\" />\n",
      "        <meta property=\"og:description\" content=\"小白的Python3快速入门教程\" />\n",
      "\n",
      "\n",
      "\t\t<link rel=\"shortcut icon\" href=\"http://spbeen.oss-cn-qingdao.aliyuncs.com/static%2Ffavicon.ico\" />\n",
      "\n",
      "\t\t<title>Python技术栈 | Spbeen</title>\n",
      "        \n",
      "    \n",
      "\t\t<link rel=\"stylesheet\" type=\"text/css\" href=\"http://spbeen.oss-cn-qingdao.aliyuncs.com/static%2Fwww%2Fcss%2Ftopbottom.css\" />\n",
      "\n",
      "            <script src=\"https://cdn.bootcss.com/vue/2.1.10/vue.js\"></script>\n",
      "\n",
      "\t\t<!--<link rel=\"stylesheet\" href=\"https://cdn.jsdelivr.net/semantic-ui/2.2.4/semantic.min.css\">-->\n",
      "        <!--<script src=\"https://cdn.jsdelivr.net/semantic-ui/2.2.4/semantic.min.js\"></script>-->\n",
      "\n",
      "        <link href=\"https://cdnjs.cloudflare.com/ajax/libs/semantic-ui/2.4.1/semantic.css\" rel=\"stylesheet\" />\n",
      "        <link rel=\"stylesheet\" href=\"http://spbeen.oss-cn-qingdao.aliyuncs.com/static%2Fwww%2Fcss%2Fmarkdown-www.css\" />\n",
      "        <style>\n",
      "        #bread{background-color: white;box-shadow:0px 0px 0px white;color:black;}\n",
      "        #bread a{color:black;}\n",
      "        #mapicon{font-size: 2em;  margin-right: 0.3em;}\n",
      "        </style>\n",
      "\n",
      "\n",
      "            <script src=\"https://cdn.bootcss.com/jquery/3.3.1/jquery.min.js\"></script>\n",
      "\n",
      "            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/semantic-ui/2.4.1/semantic.js\"></script>\n",
      "\n",
      "            <script src=\"https://cdn.bootcss.com/reqwest/2.0.5/reqwest.js\"></script>\n",
      "\n",
      "\n",
      "        \n",
      "\n",
      "    <script src=\"https://cdn.bootcss.com/highlight.js/9.8.0/highlight.min.js\"></script>\n",
      "    <script>hljs.initHighlightingOnLoad();</script>\n",
      "    <style>\n",
      "        p{margin: 1em 0em 1em 0em;}\n",
      "        a{color:black;}\n",
      "        a:hover{color:#1dbfbf;}\n",
      "    </style>\n",
      "\n",
      "\n",
      "\n",
      "\t\t<!--<script>(function(i,s,o,g,r,a,m){i[\"DaoVoiceObject\"]=r;i[r]=i[r]||function(){(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;a.charset=\"utf-8\";m.parentNode.insertBefore(a,m)})(window,document,\"script\",('https:' == document.location.protocol ? 'https:' : 'http:') + \"//widget.daovoice.io/widget/c824b807.js\",\"daovoice\")</script>-->\n",
      "\t\t<!--<script>-->\n",
      "\t\t<!--daovoice('init', {-->\n",
      "\t\t  <!--app_id: \"c824b807\"-->\n",
      "\t\t<!--});-->\n",
      "\t\t<!--daovoice('update');-->\n",
      "\t\t<!--</script>-->\n",
      "\t</head>\n",
      "\n",
      "\t<body>\n",
      "    \n",
      "\n",
      "\n",
      "    <div class=\"ui sidebar inverted vertical menu\">\n",
      "\n",
      "        <a href=\"/\" class=\"item bigfont\">\n",
      "            Spbeen <i class=\"home icon\"></i>\n",
      "        </a>\n",
      "        <a class=\"item\" href=\"/index\">\n",
      "          <i class=\"book icon\"></i>教程\n",
      "        </a>\n",
      "        <a class=\"item\" href=\"/index\">\n",
      "          <i class=\"book icon\"></i>项目\n",
      "        </a>\n",
      "\n",
      "\n",
      "\n",
      "    </div>\n",
      "    \n",
      "\n",
      "        <div class=\"pusher\">\n",
      "            <div class=\"ui grid\">\n",
      "                <div class=\"ui computer only sixteen wide computer column \">\n",
      "                    \n",
      "                    <div class=\"ui inverted teal borderless menu noradius\">\n",
      "                        <div class=\"ui container\">\n",
      "                            <a class=\"header item bigfont\" href=\"/\">\n",
      "                                SpBeen\n",
      "                            </a>\n",
      "\n",
      "                            <div class=\"ui simple dropdown item\">\n",
      "                                <div class=\"text\">教程</div><i class=\"dropdown icon\"></i>\n",
      "                                <div class=\"menu\">\n",
      "                                    <a class=\"item\" href=\"/course/Python3教程\">Python3教程</a>\n",
      "                                    <a class=\"item\" href=\"/course/xpath教程/\">Xpath教程</a>\n",
      "                                    <a class=\"item\" href=\"/course/Linux常用命令/\">Linux常用命令</a>\n",
      "                                </div>\n",
      "                            </div>\n",
      "\n",
      "\n",
      "                            <div class=\"ui simple dropdown item\">\n",
      "                                <div class=\"text\">项目</div><i class=\"dropdown icon\"></i>\n",
      "                                <div class=\"menu\">\n",
      "                                    <a class=\"item\" href=\"/item/基于Flask的微信娱乐机器人\">基于Flask的微信娱乐机器人</a>\n",
      "                                    <a class=\"item\" href=\"/item/基于Python的网络爬虫\">基于Python的网络爬虫</a>\n",
      "                                    <a class=\"item\" href=\"/item/基于 Python 的文件备份\">基于 Python 的文件备份</a>\n",
      "                                </div>\n",
      "                            </div>\n",
      "\n",
      "\n",
      "                            <div class=\"ui simple dropdown item\">\n",
      "                                <div class=\"text\">工具</div><i class=\"dropdown icon\"></i>\n",
      "                                <div class=\"menu\">\n",
      "                                    <a class=\"item\" href=\"/tool/request_info/\">IP请求信息查询</a>\n",
      "                                </div>\n",
      "                            </div>\n",
      "                        </div>\n",
      "                    </div>\n",
      "                    \n",
      "                </div>\n",
      "                <div class=\"ui mobile only tablet only sixteen wide mobile sixteen wide tablet column\">\n",
      "                    <div class=\"ui inverted teal borderless menu noradius mobile-tablet\">\n",
      "                        <a id=\"sidebar\" class=\"item\"><i class=\"bigger sidebar icon\"></i></a>\n",
      "\n",
      "                        <a href=\"/\" class=\"right item bigfont mobile-tablet-item\">Spbeen</a>\n",
      "\n",
      "                        <!--</div>-->\n",
      "                    </div>\n",
      "                </div>\n",
      "            </div>\n",
      "\n",
      "        <div class=\"ui container\">\n",
      "            <div id=\"bread\" class=\"ui icon message\">\n",
      "\n",
      "                    <i id=\"mapicon\" class=\"location arrow icon\"></i>\n",
      "                    <div class=\"content\">\n",
      "                    <p>\n",
      "                        </p><div class=\"ui breadcrumb\">\n",
      "\n",
      "\n",
      "                            \n",
      "                                <a class=\"section\" href=\"/\">Spbeen</a>\n",
      "                                <div class=\"divider\"> &gt; </div>\n",
      "                            \n",
      "\n",
      "                        </div>\n",
      "                    <p></p>\n",
      "                    </div>\n",
      "\n",
      "\n",
      "            </div>\n",
      "        </div>\n",
      "\n",
      "            <div class=\"content\">\n",
      "                \n",
      "<div id=\"content_show\" class=\"ui container\">\n",
      "    <div class=\"ui grid\">\n",
      "        <div class=\"sixteen wide mobile sixteen wide tablet twelve wide computer column\">\n",
      "            <div class=\"ui blue segment\">\n",
      "                <a class=\"ui red ribbon label\">教程</a>\n",
      "                <!--<h1 class=\"ui header\">教程</h1>-->\n",
      "                \n",
      "                <div class=\"ui vertical segment\">\n",
      "                    <h3 class=\"ui header\">\n",
      "                        <a href=\"/course/Python3教程\">Python3教程</a>\n",
      "                    </h3>\n",
      "                    <div class=\"ui sub\">\n",
      "                        <span class=\"\"><i class=\"user icon\"></i>\n",
      "                        布啦豆\n",
      "                        </span>\n",
      "                        <span class=\"\"><i class=\"eye icon\"></i>\n",
      "                            273120\n",
      "                        </span>\n",
      "                    </div>\n",
      "                    <p>\n",
      "                        Python3教程，从零开始，写一个通俗易懂的教程\n",
      "                    </p>\n",
      "                </div>\n",
      "                \n",
      "                <div class=\"ui vertical segment\">\n",
      "                    <h3 class=\"ui header\">\n",
      "                        <a href=\"/course/xpath教程\">xpath教程</a>\n",
      "                    </h3>\n",
      "                    <div class=\"ui sub\">\n",
      "                        <span class=\"\"><i class=\"user icon\"></i>\n",
      "                        布啦豆\n",
      "                        </span>\n",
      "                        <span class=\"\"><i class=\"eye icon\"></i>\n",
      "                            58525\n",
      "                        </span>\n",
      "                    </div>\n",
      "                    <p>\n",
      "                        xpath是一个分析html的强有力工具，Python爬虫可以方便的使用xpath来获取数据，写个针对爬虫的xpath教程\n",
      "                    </p>\n",
      "                </div>\n",
      "                \n",
      "                <div class=\"ui vertical segment\">\n",
      "                    <h3 class=\"ui header\">\n",
      "                        <a href=\"/course/Linux常用命令\">Linux常用命令</a>\n",
      "                    </h3>\n",
      "                    <div class=\"ui sub\">\n",
      "                        <span class=\"\"><i class=\"user icon\"></i>\n",
      "                        布啦豆\n",
      "                        </span>\n",
      "                        <span class=\"\"><i class=\"eye icon\"></i>\n",
      "                            14145\n",
      "                        </span>\n",
      "                    </div>\n",
      "                    <p>\n",
      "                        入门Linux必备的常用命令\n",
      "                    </p>\n",
      "                </div>\n",
      "                \n",
      "            </div>\n",
      "            <div class=\"ui yellow segment\">\n",
      "                <a class=\"ui red ribbon label\">项目</a>\n",
      "                \n",
      "                <div class=\"ui vertical segment\">\n",
      "                    <h3 class=\"ui header\">\n",
      "                        <a href=\"/item/基于Flask的微信娱乐机器人\">基于Flask的微信娱乐机器人</a>\n",
      "                    </h3>\n",
      "                    <div class=\"ui sub\">\n",
      "                        <span class=\"\"><i class=\"user icon\"></i>\n",
      "                        布啦豆\n",
      "                        </span>\n",
      "                        <span class=\"\"><i class=\"eye icon\"></i>\n",
      "                            21156\n",
      "                        </span>\n",
      "                    </div>\n",
      "                    <p>\n",
      "                        本次课程是基于 Flask Web 框架开发的娱乐级别的微信公众号后台，学习并实践 python 编程，Flask Web 开发以及微信公众平台开发机器人的基本步骤。\n",
      "                    </p>\n",
      "                </div>\n",
      "                \n",
      "                <div class=\"ui vertical segment\">\n",
      "                    <h3 class=\"ui header\">\n",
      "                        <a href=\"/item/基于Python的网络爬虫\">基于Python的网络爬虫</a>\n",
      "                    </h3>\n",
      "                    <div class=\"ui sub\">\n",
      "                        <span class=\"\"><i class=\"user icon\"></i>\n",
      "                        布啦豆\n",
      "                        </span>\n",
      "                        <span class=\"\"><i class=\"eye icon\"></i>\n",
      "                            38908\n",
      "                        </span>\n",
      "                    </div>\n",
      "                    <p>\n",
      "                        单线程python爬虫，爬模特图片，根据图片目录创建本地文件夹并将图图片保存到对应的目录中去。\n",
      "                    </p>\n",
      "                </div>\n",
      "                \n",
      "                <div class=\"ui vertical segment\">\n",
      "                    <h3 class=\"ui header\">\n",
      "                        <a href=\"/item/基于 Python 的文件备份\">基于 Python 的文件备份</a>\n",
      "                    </h3>\n",
      "                    <div class=\"ui sub\">\n",
      "                        <span class=\"\"><i class=\"user icon\"></i>\n",
      "                        布啦豆\n",
      "                        </span>\n",
      "                        <span class=\"\"><i class=\"eye icon\"></i>\n",
      "                            9921\n",
      "                        </span>\n",
      "                    </div>\n",
      "                    <p>\n",
      "                        本课程实现了一个 Python 的文件备份脚本，通过项目的实现，学习并实践 Python 的基础知识，Linux 的文件操作及简单的界面设计。\n",
      "                    </p>\n",
      "                </div>\n",
      "                \n",
      "            </div>\n",
      "        </div>\n",
      "\n",
      "        <div class=\"ui computer only four wide computer column right \">\n",
      "            <div class=\"ui red segment\">\n",
      "                <div class=\"ui vertical text menu\">\n",
      "                    <div class=\"active item\"><h4 class=\"ui headers\">最近更新</h4></div>\n",
      "                    \n",
      "                    <a class=\"item\" href=\"/p/b76a2e49-63c6-4076-a1c8-a09a653515e5\">\n",
      "                        1. ls命令\n",
      "                    </a>\n",
      "                    \n",
      "                    <a class=\"item\" href=\"/p/52e1d29d-4901-4215-bb86-7276ad076457\">\n",
      "                        2. cd命令\n",
      "                    </a>\n",
      "                    \n",
      "                    <a class=\"item\" href=\"/p/bb16e09d-511f-4728-af49-752ced909ec1\">\n",
      "                        3. 逐层检索和全局检索\n",
      "                    </a>\n",
      "                    \n",
      "                    <a class=\"item\" href=\"/p/4fef7500-1f9c-471b-a56e-af741bc16012\">\n",
      "                        4. 通过ID和Class检索\n",
      "                    </a>\n",
      "                    \n",
      "                    <a class=\"item\" href=\"/p/e4a032a4-3cf0-467b-9199-098240925504\">\n",
      "                        5. 简单的标签搜索\n",
      "                    </a>\n",
      "                    \n",
      "                    <a class=\"item\" href=\"/p/a070c2e5-acf9-49e6-9baf-b5bf6bace059\">\n",
      "                        6. 伊始\n",
      "                    </a>\n",
      "                    \n",
      "                    <a class=\"item\" href=\"/p/d91cc59a-93af-491c-b731-115703171a8f\">\n",
      "                        7. 类的继承和多态\n",
      "                    </a>\n",
      "                    \n",
      "                    <a class=\"item\" href=\"/p/036d0200-adb3-46ba-814f-8ee4ff18cd5d\">\n",
      "                        8. 类的不同方法（概念）\n",
      "                    </a>\n",
      "                    \n",
      "                    <a class=\"item\" href=\"/p/c7275ff7-522d-49ae-82d7-469d0eef296f\">\n",
      "                        9. Python线程使用和概念理解\n",
      "                    </a>\n",
      "                    \n",
      "                    <a class=\"item\" href=\"/p/7ecd0743-9e06-43d5-b32d-de0089ab4001\">\n",
      "                        10. yield和生成器是什么？\n",
      "                    </a>\n",
      "                    \n",
      "                    <a class=\"item\" href=\"/p/8edfee70-da72-484a-836a-ede361de344c\">\n",
      "                        11. 装饰器Decorator使用\n",
      "                    </a>\n",
      "                    \n",
      "                    <a class=\"item\" href=\"/p/6232e822-8ddf-4174-ada1-e9ab2f0f9249\">\n",
      "                        12. 对象的概念和使用示例\n",
      "                    </a>\n",
      "                    \n",
      "                </div>\n",
      "            </div>\n",
      "\n",
      "            <div class=\"ui violet segment\">\n",
      "                <!--<div class=\"ui header center aligned\">扫码移步其他終端</div>-->\n",
      "                <div class=\"ui header center aligned\">扫码关注微信公众号</div>\n",
      "                <div class=\"ui image\">\n",
      "                    <!--<img src=\"{-{ qrcode(url) }-}\" alt=\"\">-->\n",
      "                    <img src=\"http://static.spbeen.com/img/wechat_qrcode/qrcode_for_gh_5e6077ecc776_1280.jpg\" alt=\"\" />\n",
      "                </div>\n",
      "            </div>\n",
      "\n",
      "        </div>\n",
      "    </div>\n",
      "</div>\n",
      "\n",
      "\n",
      "            </div>\n",
      "\n",
      "            \n",
      "\n",
      "            <div class=\"ui inverted teal segment noradius\">\n",
      "                <div class=\"ui container\">\n",
      "\n",
      "                   Copyright @2016-2017 | 赣ICP备16003025号 <script type=\"text/javascript\">var cnzz_protocol = ((\"https:\" == document.location.protocol) ? \" https://\" : \" http://\");document.write(unescape(\"%3Cspan id='cnzz_stat_icon_1260149661'%3E%3C/span%3E%3Cscript src='\" + cnzz_protocol + \"s11.cnzz.com/z_stat.php%3Fid%3D1260149661' type='text/javascript'%3E%3C/script%3E\"));</script><span id=\"cnzz_stat_icon_1260149661\"><a href=\"http://www.cnzz.com/stat/website.php?web_id=1260149661\" target=\"_blank\" title=\"站长统计\">站长统计</a></span><script src=\" http://s11.cnzz.com/z_stat.php?id=1260149661\" type=\"text/javascript\"></script><script src=\"http://c.cnzz.com/core.php?web_id=1260149661&amp;t=z\" charset=\"utf-8\" type=\"text/javascript\"></script>\n",
      "\n",
      "\n",
      "                </div>\n",
      "            </div>\n",
      "\n",
      "            \n",
      "\n",
      "            \n",
      "            <script>\n",
      "                $(document).ready(function(){\n",
      "                    $(\".content table\").addClass(\"ui celled striped table\");\n",
      "\n",
      "                    $(\"#sidebar\").click(function(){\n",
      "                      $('.ui.sidebar').sidebar('toggle');\n",
      "                    });\n",
      "\n",
      "                });\n",
      "            </script>\n",
      "            \n",
      "\n",
      "    </div>\n",
      "\t\n",
      "\n",
      "</body></html>\n"
     ]
    }
   ],
   "source": [
    "print(br.page_source)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "660318\n"
     ]
    }
   ],
   "source": [
    "html_str = br.page_source\n",
    "print(len(html_str))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# requests 一次请求，数据有限；速度快\n",
    "# selenium 浏览器加载数据，多次请求，会执行js；速度慢"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
